1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
|
From: Samuel Henrique <samueloph@debian.org>
Date: Sun, 12 Oct 2025 14:39:46 +0100
Subject: [PATCH] Don't percent-decode '/' and '\' in output file name
Co-Authored-by: Sergio Durigan Junior <sergiodj@sergiodj.net>
Backported-by: Samuel Henrique <samueloph@debian.org>
* Modify wcurl patch to apply on curl sources by changing the location of the
wcurl script from wcurl to scripts/wcurl.
* Drop changes to wcurl's tests as they are not in the curl sources.
* Swap placement of logical AND (&&) operator in conditions of the if
statement to match the new approach; i.e.; they are written in the beginning
of the line instead of the end now.
* Pull fix from https://github.com/curl/wcurl/pull/75, prefixing values
in UNSAFE_PERCENT_ENCODE with "%".
---
scripts/wcurl | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/scripts/wcurl b/scripts/wcurl
index 84b981a..a70c3b8 100755
--- a/scripts/wcurl
+++ b/scripts/wcurl
@@ -113,6 +113,13 @@ readonly PER_URL_PARAMETERS="\
--remote-time \
--retry 5 "
+# Valid percent-encode codes that are considered unsafe to be decoded.
+# This is a list of space-separated percent-encoded uppercase
+# characters.
+# 2F = /
+# 5C = \
+readonly UNSAFE_PERCENT_ENCODE="%2F %5C"
+
# Whether to invoke curl or not.
DRY_RUN="false"
@@ -137,6 +144,20 @@ is_subset_of()
esac
}
+# Indicate via exit code whether the HTML code given in the first
+# parameter is safe to be decoded.
+is_safe_percent_encode()
+{
+ upper_str=$(printf "%s" "${1}" | tr "[:lower:]" "[:upper:]")
+ for unsafe in ${UNSAFE_PERCENT_ENCODE}; do
+ if [ "${unsafe}" = "${upper_str}" ]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
# Print the given string percent-decoded.
percent_decode()
{
@@ -151,9 +172,10 @@ percent_decode()
decode_out="${decode_out}${decode_hex2}"
# Skip decoding if this is a control character (00-1F).
# Skip decoding if DECODE_FILENAME is not "true".
- if is_subset_of "${decode_hex1}" "23456789abcdefABCDEF" && \
- is_subset_of "${decode_hex2}" "0123456789abcdefABCDEF" && \
- [ "${DECODE_FILENAME}" = "true" ]; then
+ if [ "${DECODE_FILENAME}" = "true" ] \
+ && is_subset_of "${decode_hex1}" "23456789abcdefABCDEF" \
+ && is_subset_of "${decode_hex2}" "0123456789abcdefABCDEF" \
+ && is_safe_percent_encode "${decode_out}"; then
# Use printf to decode it into octal and then decode it to the final format.
decode_out="$(printf "%b" "\\$(printf %o "0x${decode_hex1}${decode_hex2}")")"
fi
|