File: input_sources.ads

package info (click to toggle)
libxmlada1 1.0-2
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 2,704 kB
  • ctags: 94
  • sloc: ada: 22,582; sh: 1,804; makefile: 142; xml: 140; perl: 128
file content (137 lines) | stat: -rw-r--r-- 6,267 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
-----------------------------------------------------------------------
--                XML/Ada - An XML suite for Ada95                   --
--                                                                   --
--                       Copyright (C) 2001-2002                     --
--                            ACT-Europe                             --
--                                                                   --
-- This library is free software; you can redistribute it and/or     --
-- modify it under the terms of the GNU General Public               --
-- License as published by the Free Software Foundation; either      --
-- version 2 of the License, or (at your option) any later version.  --
--                                                                   --
-- This library is distributed in the hope that it will be useful,   --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of    --
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU --
-- General Public License for more details.                          --
--                                                                   --
-- You should have received a copy of the GNU General Public         --
-- License along with this library; if not, write to the             --
-- Free Software Foundation, Inc., 59 Temple Place - Suite 330,      --
-- Boston, MA 02111-1307, USA.                                       --
--                                                                   --
-- As a special exception, if other files instantiate generics from  --
-- this unit, or you link this unit with other files to produce an   --
-- executable, this  unit  does not  by itself cause  the resulting  --
-- executable to be covered by the GNU General Public License. This  --
-- exception does not however invalidate any other reasons why the   --
-- executable file  might be covered by the  GNU Public License.     --
-----------------------------------------------------------------------

--  <description>
--  This package provides a hierarchy of objects that return characters
--  that can then be used for different tasks.
--  It is not possible to go backward, nor to previous characters. This
--  interface is intentionally kept minimal, so that it can easily be used
--  with files, sockets, ...
--
--  Input sources should try to automatically detect the appropriate encoding
--  to use, for instance by using the byte order mark, if present, of the
--  unicode stream (16#FFFE# or 16#FEFF#).
--  </description>

with Unicode;
with Unicode.CES;
with Unicode.CES.Basic_8bit;
with Unicode.CCS;

package Input_Sources is

   type Input_Source is abstract tagged limited private;
   --  General object for reading characters, one at a time.

   type Input_Source_Access is access all Input_Source'Class;

   procedure Next_Char
     (From : in out Input_Source;
      C    : out Unicode.Unicode_Char) is abstract;
   --  Return a single character from From.
   --  This also increments the internal index, so that the nex time this
   --  function is called the next character in the stream is returned.

   function Eof (From : Input_Source) return Boolean is abstract;
   --  Return True if there is no more character to read on the stream

   function Prolog_Size (From : Input_Source) return Natural;
   --  Return the number of characters that were ignored at the beginning
   --  of the stream (for instance because they indicated the encoding used
   --  in the file).

   procedure Set_Encoding
     (Input : in out Input_Source;
      Es    : Unicode.CES.Encoding_Scheme);
   --  Set the encoding associated with the input stream.
   --  This can be used to convert from any type of encoding for the byte
   --  sequence (Utf8, Utf16, ..) and any character set (Latin-1, Unicode,..)
   --  to unicode characters.
   --  Input_Sources are encouraged to guess the encoding whenever possible,
   --  but you can override that default at any time.

   function Get_Encoding (Input : Input_Source)
      return Unicode.CES.Encoding_Scheme;
   --  Return the encoding scheme associated with the input

   procedure Set_Character_Set
     (Input : in out Input_Source;
      Cs    : Unicode.CCS.Character_Set);
   --  Set the character set associated with the stream.
   --  It isn't possible to get the character set automatically for a stream.
   --  As a result, the default one is always considered to be Unicode

   function Get_Character_Set (Input : Input_Source)
      return Unicode.CCS.Character_Set;
   --  Return the character set associated with the input.

   procedure Set_Stream_Encoding
     (Input    : in out Input_Sources.Input_Source'Class;
      Encoding : String);
   --  Set the encoding and the character set for the stream associated with
   --  Parser.
   --  Invalid_Encoding is raised if Encoding is unknown.
   --  Encoding should have the form given in an XML file in the "encoding="
   --  parameter, for instance "UTF-8", "UTF-16", "ISO-8859-1",...

   procedure Set_System_Id
     (Input : in out Input_Source;
      Id    : Unicode.CES.Byte_Sequence);
   --  Set the system ID associated with the input source.
   --  Although this is optional, it is still useful since it can be used to
   --  resolve relative URI's from documents.

   function Get_System_Id (Input : Input_Source)
      return Unicode.CES.Byte_Sequence;
   --  Return the system Id.

   procedure Set_Public_Id
     (Input : in out Input_Source;
      Id    : Unicode.CES.Byte_Sequence);
   --  This will be provided as part of the location information, if it is
   --  given.

   function Get_Public_Id (Input : Input_Source)
      return Unicode.CES.Byte_Sequence;
   --  Return the public Id.

   procedure Close (Input : in out Input_Source);
   --  Free the memory allocated in the input.

private
   type Input_Source is abstract tagged limited record
      Prolog_Size : Natural := 0;
      Es          : Unicode.CES.Encoding_Scheme :=
        Unicode.CES.Basic_8bit.Basic_8bit_Encoding;
      Cs          : Unicode.CCS.Character_Set :=
        Unicode.CCS.Unicode_Character_Set;
      Public_Id   : Unicode.CES.Byte_Sequence_Access;
      System_Id   : Unicode.CES.Byte_Sequence_Access;
   end record;
end Input_Sources;