From 16708d7e9a6927eaa32be310ae34beaa593409f5 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Tue, 10 Aug 2021 15:35:36 +0200
Subject: MINOR: http: add a new function http_validate_scheme() to validate a
 scheme

While http_parse_scheme() extracts a scheme from a URI by extracting
exactly the valid characters and stopping on delimiters, this new
function performs the same on a fixed-size string.

(cherry picked from commit adfc08e717db600c3ac44ca8f3178d861699b67c)
[wt: context adj]
Signed-off-by: Willy Tarreau <w@1wt.eu>
(cherry picked from commit 073e9c9c10897a05117f29cb9d3ebdbc13ff03b5)
[wt: context adj]
Signed-off-by: Willy Tarreau <w@1wt.eu>
(cherry picked from commit 0fb53c3c025fb158c51c515532f3f52bb2abcdea)
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 include/haproxy/http.h |  1 +
 src/http.c             | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/include/haproxy/http.h b/include/haproxy/http.h
index 1bd9da9ad..00a64c7b7 100644
--- a/include/haproxy/http.h
+++ b/include/haproxy/http.h
@@ -36,6 +36,7 @@ extern const uint8_t http_char_classes[256];
 enum http_meth_t find_http_meth(const char *str, const int len);
 int http_get_status_idx(unsigned int status);
 const char *http_get_reason(unsigned int status);
+int http_validate_scheme(const struct ist schm);
 struct ist http_get_authority(const struct ist uri, int no_userinfo);
 struct ist http_get_path(const struct ist uri);
 int http_header_match2(const char *hdr, const char *end,
diff --git a/src/http.c b/src/http.c
index 3d88aa274..12a72acce 100644
--- a/src/http.c
+++ b/src/http.c
@@ -458,6 +458,29 @@ const char *http_get_reason(unsigned int status)
 	}
 }
 
+/* Returns non-zero if the scheme <schm> is syntactically correct according to
+ * RFC3986#3.1, otherwise zero. It expects only the scheme and nothing else
+ * (particularly not the following "://").
+ *     Scheme = alpha *(alpha|digit|'+'|'-'|'.')
+ */
+int http_validate_scheme(const struct ist schm)
+{
+	size_t i;
+
+	for (i = 0; i < schm.len; i++) {
+		if (likely((schm.ptr[i] >= 'a' && schm.ptr[i] <= 'z') ||
+			   (schm.ptr[i] >= 'A' && schm.ptr[i] <= 'Z')))
+			continue;
+		if (unlikely(!i)) // first char must be alpha
+			return 0;
+		if ((schm.ptr[i] >= '0' && schm.ptr[i] <= '9') ||
+		    schm.ptr[i] == '+' || schm.ptr[i] == '-' || schm.ptr[i] == '.')
+			continue;
+		return 0;
+	}
+	return !!i;
+}
+
 /* Parse the uri and looks for the authority, between the scheme and the
  * path. if no_userinfo is not zero, the part before the '@' (including it) is
  * skipped. If not found, an empty ist is returned. Otherwise, the ist pointing
-- 
2.28.0

