File: gnatcoll-email-parser.adb

package info (click to toggle)
libgnatcoll 18-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 5,068 kB
  • sloc: ada: 40,393; python: 354; ansic: 310; makefile: 245; sh: 31
file content (373 lines) | stat: -rw-r--r-- 13,257 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
------------------------------------------------------------------------------
--                             G N A T C O L L                              --
--                                                                          --
--                     Copyright (C) 2006-2017, AdaCore                     --
--                                                                          --
-- This library is free software;  you can redistribute it and/or modify it --
-- under terms of the  GNU General Public License  as published by the Free --
-- Software  Foundation;  either version 3,  or (at your  option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
------------------------------------------------------------------------------

pragma Warnings (Off, "*internal GNAT unit*");
with Ada.Strings.Unbounded.Aux;
pragma Warnings (On, "*internal GNAT unit*");

with GNAT.Case_Util;             use GNAT.Case_Util;
with GNATCOLL.VFS;               use GNATCOLL.VFS;
with GNAT.Strings;               use GNAT.Strings;

package body GNATCOLL.Email.Parser is

   function Preserve_Header (Name : String) return Boolean;
   pragma Inline (Preserve_Header);
   --  Whether the given header should be preserved in the generated message

   procedure Parse_Payload (Msg : in out Message; Unparsed : String);
   --  Parse the payload, as read in Unparsed, into its various components,
   --  and store them in the message appropriately

   ---------------------
   -- Preserve_Header --
   ---------------------

   function Preserve_Header (Name : String) return Boolean is
      N : String := Name;
   begin
      To_Lower (N);

      case N (N'First) is
         when 'c' =>
            return N = "cc" or else N = "content-type";
         when 'd' =>
            return N = "date";
         when 'f' =>
            return N = "from";
         when 'i' =>
            return N = "in-reply-to";
         when 'm' =>
            return N = "message-id" or else N = "mime-version";
         when 'r' =>
            return N = "references" or else N = "reply-to";
         when 's' =>
            return N = "subject";
         when 't' =>
            return N = "to";
         when 'x' =>
            return True;  --  All X-* headers
         when others =>
            return False;
      end case;
   end Preserve_Header;

   -----------
   -- Parse --
   -----------

   procedure Parse
     (Str     : String;
      Msg     : out Message) is
   begin
      Full_Parse (Str, Msg, Store_Headers => True, Store_Payload => True,
                  Parse_Payload => True);
   end Parse;

   --------------------------
   -- Parse_Ignore_Headers --
   --------------------------

   procedure Parse_Ignore_Headers (Str : String; Msg : out Message) is
   begin
      Full_Parse (Str, Msg, Store_Headers => False, Store_Payload => True,
                  Parse_Payload => False);
   end Parse_Ignore_Headers;

   ---------------------------
   -- Parse_Minimal_Headers --
   ---------------------------

   procedure Parse_Minimal_Headers (Str : String; Msg : out Message) is
   begin
      Full_Parse (Str, Msg, Store_Headers => True, Store_Payload => True,
                  Parse_Payload => True, Filter => Preserve_Header'Access);
   end Parse_Minimal_Headers;

   ----------------------
   -- Parse_No_Payload --
   ----------------------

   procedure Parse_No_Payload (Str : String; Msg : out Message) is
   begin
      Full_Parse
        (Str, Msg, Store_Headers => True, Store_Payload => True,
         Parse_Payload => False);
   end Parse_No_Payload;

   --------------------------------------
   -- Parse_No_Payload_Minimal_Headers --
   --------------------------------------

   procedure Parse_No_Payload_Minimal_Headers
     (Str : String; Msg : out Message)
   is
   begin
      Full_Parse
        (Str, Msg, Store_Headers => True, Store_Payload => True,
         Parse_Payload => False,
         Filter        => Preserve_Header'Access);
   end Parse_No_Payload_Minimal_Headers;

   ----------------
   -- Full_Parse --
   ----------------

   procedure Full_Parse
     (Str           : String;
      Msg           : out Message;
      Store_Headers : Boolean := True;
      Store_Payload : Boolean := True;
      Parse_Payload : Boolean := True;
      Filter        : Header_Filter := null)
   is
      Index : Integer := Str'First;
      Stop  : constant Integer := Str'Last;
      Colon : Integer;
      Eol   : Integer;
      Next, Eol2 : Integer;
      Is_Continuation : Boolean;
      Value : Unbounded_String;

      function RTrim_CR (Item : String) return String is
        (if Item /= "" and then Item (Item'Last) = ASCII.CR
         then Item (Item'First .. Item'Last - 1) else Item);

      function LTrim_Space (Item : String) return String is
        (if Item /= "" and then Item (Item'First) = ' '
         then Item (Item'First + 1 .. Item'Last) else Item);

   begin
      Msg := New_Message (MIME_Type => "");

      --  Do we have an envelope for the message ?
      if Index + 4 < Str'Last
        and then Str (Index .. Index + 4) = "From "
      then
         Eol := Next_Occurrence (Str (Index .. Stop), ASCII.LF);
         Set_Envelope_From (Msg, Str (Index .. Eol - 1));
         Index := Eol + 1;
      end if;

      --  Find the headers block. It is defined as being the set of lines up
      --  to the first line that doesn't match the headers format. This can be
      --  an empty line (and should generally be the case according to
      --  RFC2822), but could be anything else, in which case the extra line
      --  is assumed to belong to the body

      while Index <= Stop loop
         Eol   := Next_Occurrence (Str (Index .. Stop), ASCII.LF);
         Colon := Next_Occurrence (Str (Index .. Eol), ':');
         exit when Colon > Eol;

         --  ??? Header names are characters between 33 and 126 inclusive. We
         --  should check

         --  Check for continuation lines: if the next line starts with a
         --  whitespace but contains other characters than whitespaces, it is
         --  part of the same header. We have this whitespace handling because
         --  of cases where the subject line is followed by the separator line,
         --  itself starting with a space. This is not full RFC2822 of course,
         --  but it is nice to handle this correctly anyway

         Value := To_Unbounded_String
                    (LTrim_Space (RTrim_CR (Str (Colon + 1 .. Eol - 1))));

         while Eol < Str'Last and then Is_Whitespace (Str (Eol + 1)) loop
            Next := Eol + 1;
            Is_Continuation := False;
            Eol2 := Next_Occurrence (Str (Next .. Stop), ASCII.LF);
            for F in Next + 1 .. Eol2 - 1 loop
               if not Is_Whitespace (Str (F)) then
                  Append (Value, ' ' & RTrim_CR (Str (F .. Eol2 - 1)));
                  Is_Continuation := True;
                  exit;
               end if;
            end loop;

            exit when not Is_Continuation;
            Eol  := Eol2;
         end loop;

         if Store_Headers
           and then (Filter = null or else Filter (Str (Index .. Colon - 1)))
         then
            Add_Header
              (Msg,
               Create (Name  => Str (Index .. Colon - 1),
                       Value => To_String (Value)));
         end if;

         Index := Eol + 1;
      end loop;

      --  A blank line is not part of the body, any other line is
      if Index <= Str'Last and then Str (Index) = ASCII.LF then
         Index := Index + 1;
      end if;

      if Store_Payload then
         if not Parse_Payload then

            --  Note: do not use Set_Text_Payload here, as this would reset
            --  the Content-Type header.

            Msg.Contents.Payload :=
              (Multipart => False,
               Text      => To_Unbounded_String (Str (Index .. Str'Last)));

         else
            Email.Parser.Parse_Payload (Msg, Str (Index .. Str'Last));
         end if;
      end if;

   exception
      when others =>
         Msg := Null_Message;
   end Full_Parse;

   -------------------
   -- Parse_Payload --
   -------------------

   procedure Parse_Payload (Msg : in out Message; Unparsed : String) is
      Boundary         : constant String := Get_Boundary (Msg);
      Length           : constant Natural := Boundary'Length;
      Index            : Integer := Unparsed'First;
      Tmp              : Integer;
      Is_Last_Boundary : Boolean := False;
      Is_Boundary      : Boolean;
      Start            : Integer := -1;
      Attachment       : Message;
   begin
      if Boundary = "" then
         Set_Text_Payload (Msg, Unparsed, MIME_Type => "");

      else
         while not Is_Last_Boundary
           and then Index + Length < Unparsed'Last
         loop
            if Unparsed (Index) = '-'
              and then Unparsed (Index + 1) = '-'
              and then Unparsed (Index + 2 .. Index + 1 + Length) = Boundary
            then
               Tmp := Index + 2 + Length;

               if Unparsed (Tmp) = '-'
                 and then Unparsed (Tmp + 1) = '-'
               then
                  Is_Last_Boundary := True;
                  Tmp := Tmp + 2;
               end if;

               Is_Boundary := True;
               while Tmp <= Unparsed'Last
                 and then Unparsed (Tmp) /= ASCII.LF
               loop
                  if not Is_Whitespace (Unparsed (Tmp)) then
                     --  Not a boundary after all
                     Is_Boundary := False;
                     Is_Last_Boundary := False;
                     exit;
                  end if;
                  Tmp := Tmp + 1;
               end loop;

               if Is_Boundary then
                  if Start /= -1 then
                     Full_Parse
                       (Str           => Unparsed (Start .. Index - 2),
                        Msg           => Attachment,
                        Store_Headers => True,
                        Store_Payload => True,
                        Parse_Payload => True);
                     if Attachment /= Null_Message then
                        Add_Payload (Msg, Attachment);
                     else
                        --  Should exit with error message I guess
                        null;
                     end if;

                  else
                     Set_Preamble
                       (Msg, Unparsed (Unparsed'First .. Index - 2));
                  end if;

                  Start := Tmp + 1;
                  Is_Last_Boundary := Is_Last_Boundary
                    or else Tmp + Length >= Unparsed'Last;
               end if;

               Index := Next_Occurrence
                 (Unparsed (Tmp .. Unparsed'Last), ASCII.LF) + 1;

            else
               Index := Next_Occurrence
                 (Unparsed (Index .. Unparsed'Last), ASCII.LF) + 1;
            end if;
         end loop;
      end if;

      if Index < Unparsed'Last and then Start /= -1 then
         Set_Epilogue (Msg, Unparsed (Start .. Unparsed'Last));
      end if;
   end Parse_Payload;

   --------------------------
   -- Full_Parse_From_File --
   --------------------------

   procedure Full_Parse_From_File
     (Filename      : Virtual_File;
      Msg           : out Message;
      Store_Headers : Boolean := True;
      Store_Payload : Boolean := True;
      Parse_Payload : Boolean := True;
      Filter        : Header_Filter := null)
   is
      Str  : GNAT.Strings.String_Access;
   begin
      Str := Read_File (Filename);
      Full_Parse (Str.all,
                  Msg, Store_Headers,
                  Store_Payload, Parse_Payload, Filter);
      Free (Str);
   end Full_Parse_From_File;

   -------------------
   -- Parse_Payload --
   -------------------

   procedure Parse_Payload (Msg : in out Message) is
      use Ada.Strings.Unbounded.Aux;

      Payload     : constant Unbounded_String := Msg.Contents.Payload.Text;
      Payload_Str : Big_String_Access;
      Payload_Len : Natural;
   begin
      Msg.Contents.Payload.Text := Null_Unbounded_String;
      Get_String (Payload, Payload_Str, Payload_Len);
      Parse_Payload (Msg, Payload_Str (1 .. Payload_Len));
   end Parse_Payload;

end GNATCOLL.Email.Parser;