File: 0003-Detect-reasonable-email-headers-too.patch

package info (click to toggle)
libpst 0.6.76-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 17,616 kB
  • sloc: ansic: 7,674; sh: 4,730; javascript: 1,950; cpp: 1,691; makefile: 137; xml: 3
file content (95 lines) | stat: -rw-r--r-- 3,033 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
From f57392a308817917eaac74aed27d2ceb27aba23d Mon Sep 17 00:00:00 2001
From: Paul Wise <pabs3@bonedaddy.net>
Date: Sun, 30 May 2021 09:51:26 +0800
Subject: [PATCH 3/3] Detect reasonable email headers too

RFC 5322 specifies the syntax of email headers, most header fields are more
restricted though so use a restricted check in case the headers are bogus
parts of the body that happen to match RFC 5322.

Fixes: https://bugs.debian.org/984581
---
 src/readpst.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/src/readpst.c b/src/readpst.c
index 6f9455297..fbd90c762 100644
--- a/src/readpst.c
+++ b/src/readpst.c
@@ -1283,6 +1283,65 @@ int  header_match(char *header, char*field) {
     return 0;
 }
 
+// https://en.wikipedia.org/wiki/Email#Message_header
+// https://www.rfc-editor.org/rfc/rfc5322.html
+// https://www.iana.org/assignments/message-headers/message-headers.xhtml
+int  header_is_reasonable(char *header)
+{
+    char *c;
+#define C *c
+
+    // The header must not be NULL
+    if (header) c = header;
+    else return 0;
+
+    // usually the header field name starts with upper-case: A-Z
+    if (C >= 'A' && C <= 'Z') c++;
+    else return 0;
+
+    while(1) {
+        // most header field names use a limited set of characters: - 0-9 A-Z a-z
+        if (
+            (C >= 'A' && C <= 'Z') ||
+            (C >= 'a' && C <= 'z') ||
+            (C >= '0' && C <= '9') ||
+            (C == '-')
+           ) {
+            c++;
+        // the header field name is then terminated with a colon
+        } else if (C == ':') {
+          c++;
+          goto parse_header_field_value;
+        // other characters are an indicator of an invalid header
+        } else {
+          return 0;
+        }
+    }
+
+parse_header_field_value:
+    while(1) {
+        // header field values are printable US-ASCII plus space/tab
+        if (
+            (C >= 33 && C <= 126) ||
+            (C == ' ' || C == '\t')
+           ) {
+            c++;
+        // the header field value is then terminated with CRLF
+        } else if (C == '\r' && *(c+1) == '\n') {
+            c += 2;
+            // the value could continue to the next line though
+            if (C == ' ' || C == '\t') c++;
+            else return 1;
+        // other characters are an indicator of an invalid header
+        } else {
+          return 0;
+        }
+    }
+
+#undef C
+
+}
+
 int  valid_headers(char *header)
 {
     // headers are sometimes really bogus - they seem to be fragments of the
@@ -1303,6 +1362,7 @@ int  valid_headers(char *header)
         if (header_match(header, "X-ASG-Debug-ID: "               )) return 1;
         if (header_match(header, "X-Barracuda-URL: "              )) return 1;
         if (header_match(header, "X-x: "                          )) return 1;
+        if (header_is_reasonable(header)) return 1;
         if (strlen(header) > 2) {
             DEBUG_INFO(("Ignore bogus headers = %s\n", header));
         }
-- 
2.32.0