File: gnatcoll-json-utility.adb

package info (click to toggle)
libgnatcoll 18-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 5,068 kB
  • sloc: ada: 40,393; python: 354; ansic: 310; makefile: 245; sh: 31
file content (260 lines) | stat: -rw-r--r-- 8,895 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
------------------------------------------------------------------------------
--                             G N A T C O L L                              --
--                                                                          --
--                     Copyright (C) 2011-2018, AdaCore                     --
--                                                                          --
-- This library is free software;  you can redistribute it and/or modify it --
-- under terms of the  GNU General Public License  as published by the Free --
-- Software  Foundation;  either version 3,  or (at your  option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
------------------------------------------------------------------------------

pragma Ada_2012;

with Ada.Characters.Wide_Wide_Latin_1; use Ada.Characters.Wide_Wide_Latin_1;
with Interfaces;                       use Interfaces;

with GNAT.Encode_UTF8_String;
with GNAT.Decode_UTF8_String;

package body GNATCOLL.JSON.Utility is

   use Ada.Strings.Unbounded;

   To_Hex : constant array (Unsigned_16 range 0 .. 15) of Character :=
     "0123456789ABCDEF";

   --------------------------------
   -- Escape_Non_Print_Character --
   --------------------------------

   function Escape_Non_Print_Character
     (C : Wide_Wide_Character) return String
   is
      Code  : constant Unsigned_32 := Wide_Wide_Character'Pos (C);
      Buf   : String (1 .. 12);
      Last  : Natural := Buf'First - 1;

      procedure Append_Escaped (Code : Unsigned_16);

      --------------------
      -- Append_Escaped --
      --------------------

      procedure Append_Escaped (Code : Unsigned_16) is
      begin
         Last := Last + 6;
         Buf (Last - 5 .. Last - 4) := "\u";
         Buf (Last - 3) := To_Hex ((Code / 16#1000#) mod 16#10#);
         Buf (Last - 2) := To_Hex ((Code / 16#100#) mod 16#10#);
         Buf (Last - 1) := To_Hex ((Code / 16#10#) mod 16#10#);
         Buf (Last) := To_Hex (Code mod 16#10#);
      end Append_Escaped;

   begin
      if Code <= 16#FFFF# then
         Append_Escaped (Unsigned_16 (Code));

      else
         --  Represent character as surrogate pair

         Append_Escaped
           (16#D800# + Unsigned_16 ((Code - 16#1_0000#) / 16#400#));
         Append_Escaped (16#DC00# + Unsigned_16 (Code mod 16#400#));
      end if;

      return Buf (Buf'First .. Last);
   end Escape_Non_Print_Character;

   -------------------
   -- Escape_String --
   -------------------

   function Escape_String (Text : UTF8_XString) return Unbounded_String is
      Str         : GNATCOLL.Strings.Char_Array;
      Text_Length : Natural;
      Ret         : Unbounded_String;
      Low         : Natural;
      W_Chr       : Wide_Wide_Character;

   begin
      Text.Get_String (Str, Text_Length);

      Append (Ret, '"');
      Low := 1;

      while Low <= Text_Length loop
         --  UTF-8 sequence is maximum 4 characters long according to RFC3629

         begin
            GNAT.Decode_UTF8_String.Decode_Wide_Wide_Character
              (String (Str (Low .. Natural'Min (Text_Length, Low + 3))),
               Low, W_Chr);
         exception
            when Constraint_Error =>
               --  Skip the character even if it is invalid.
               Low := Low + 1;
               W_Chr := NUL;
         end;

         case W_Chr is
            when NUL =>
               Append (Ret, "\u0000");
            when '"' =>
               Append (Ret, "\""");
            when '\' =>
               Append (Ret, "\\");
            when BS =>
               Append (Ret, "\b");
            when FF =>
               Append (Ret, "\f");
            when LF =>
               Append (Ret, "\n");
            when CR =>
               Append (Ret, "\r");
            when HT =>
               Append (Ret, "\t");
            when others =>
               if Wide_Wide_Character'Pos (W_Chr) < 32 then
                  Append (Ret, Escape_Non_Print_Character (W_Chr));
               elsif Wide_Wide_Character'Pos (W_Chr) >= 16#80# then
                  Append (Ret, Escape_Non_Print_Character (W_Chr));
               else
                  Append
                    (Ret,
                     "" & Character'Val (Wide_Wide_Character'Pos (W_Chr)));
               end if;
         end case;
      end loop;

      Append (Ret, '"');
      return Ret;
   end Escape_String;

   ----------------------
   -- Un_Escape_String --
   ----------------------

   function Un_Escape_String
     (Text : String;
      Low  : Natural;
      High : Natural) return UTF8_XString
   is
      First : Integer;
      Last  : Integer;
      Unb   : UTF8_XString;
      Idx   : Natural;

   begin
      First := Low;
      Last  := High;

      --  Trim blanks and double quotes

      while First <= High and then Text (First) = ' ' loop
         First := First + 1;
      end loop;
      if First <= High and then Text (First) = '"' then
         First := First + 1;
      end if;

      while Last >= Low and then Text (Last) = ' ' loop
         Last := Last - 1;
      end loop;
      if Last >= Low and then Text (Last) = '"' then
         Last := Last - 1;
      end if;

      Idx := First;
      while Idx <= Last loop
         if Text (Idx) = '\' then
            Idx := Idx + 1;

            if Idx > High then
               raise Invalid_JSON_Stream with
                 "Unexpected escape character at end of line";
            end if;

            --  See http://tools.ietf.org/html/rfc4627 for the list of
            --  characters that can be escaped.

            case Text (Idx) is
               when 'u' | 'U' =>
                  declare
                     Lead : constant Unsigned_16 :=
                       Unsigned_16'Value
                         ("16#" & Text (Idx + 1 .. Idx + 4) & "#");
                     Trail : Unsigned_16;
                     Char  : Wide_Wide_Character;

                  begin
                     Char := Wide_Wide_Character'Val (Lead);

                     --  If character is high surrogate and next character is
                     --  low surrogate then them represent one non-BMP
                     --  character.

                     if Lead in 16#D800# .. 16#DBFF#
                       and then Text (Idx + 5) = '\'
                       and then Text (Idx + 6) in 'u' | 'U'
                     then
                        Trail := Unsigned_16'Value
                          ("16#" & Text (Idx + 7 .. Idx + 10) & '#');
                        Char := Wide_Wide_Character'Val
                          (16#1_0000#
                           + Unsigned_32 (Lead and 16#03FF#) * 16#0400#
                           + Unsigned_32 (Trail and 16#03FF#));
                        Idx := Idx + 6;
                     end if;

                     Unb.Append
                       (GNAT.Encode_UTF8_String.Encode_Wide_Wide_String
                          ((1 => Char)));
                     Idx := Idx + 4;
                  end;

               when '"' =>
                  Unb.Append ('"');
               when '/' =>
                  Unb.Append ('/');
               when '\' =>
                  Unb.Append ('\');
               when 'b' =>
                  Unb.Append (ASCII.BS);
               when 'f' =>
                  Unb.Append (ASCII.FF);
               when 'n' =>
                  Unb.Append (ASCII.LF);
               when 'r' =>
                  Unb.Append (ASCII.CR);
               when 't' =>
                  Unb.Append (ASCII.HT);
               when others =>
                  raise Invalid_JSON_Stream with
                    "Unexpected escape sequence '\" &
                    Text (Idx) & "'";
            end case;

         else
            Unb.Append (Text (Idx));
         end if;

         Idx := Idx + 1;
      end loop;

      return Unb;
   end Un_Escape_String;

end GNATCOLL.JSON.Utility;