1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
/*
* escape.c - printing handling
*
* Copyright © 2011-2025 Jim Warner <james.warner@comcast.net>
* Copyright © 2016-2024 Craig Small <csmall@dropbear.xyz>
* Copyright © 1998-2005 Albert Cahalan
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include "escape.h"
#include "readproc.h"
#include "nls.h"
#define SECURE_ESCAPE_ARGS(dst, bytes) do { \
if ((bytes) <= 0) return 0; \
*(dst) = '\0'; \
if ((bytes) >= INT_MAX) return 0; \
} while (0)
/*
* Validate a UTF-8 string, with some characters possibly escaped,
* while remaining compliant with RFC 3629
*
* FIXME: not future-proof
*/
static void u8charlen (unsigned char *s, unsigned size) {
int n;
unsigned x;
while (size) {
// 0xxxxxxx, U+0000 - U+007F
if (s[0] <= 0x7f) { n = 1; goto esc_maybe; }
if (size >= 2 && (s[1] & 0xc0) == 0x80) {
// 110xxxxx 10xxxxxx, U+0080 - U+07FF
if (s[0] >= 0xc2 && s[0] <= 0xdf) { n = 2; goto esc_maybe; };
if (size >= 3 && (s[2] & 0xc0) == 0x80) {
#ifndef OFF_UNICODE_PUA
x = ((unsigned)s[0] << 16) + ((unsigned)s[1] << 8) + (unsigned)s[2];
/* 11101110 10000000 10000000, U+E000 - primary PUA begin
11101111 10100011 10111111, U+F8FF - primary PUA end */
if (x >= 0xee8080 && x <= 0xefa3bf) goto esc_definitely;
#endif
x = (unsigned)s[0] << 6 | (s[1] & 0x3f);
// 1110xxxx 10xxxxxx 10xxxxxx, U+0800 - U+FFFF minus U+D800 - U+DFFF
if ((x >= 0x3820 && x <= 0x3b5f) || (x >= 0x3b80 && x <= 0x3bff)) { n = 3; goto next_up; };
if (size >= 4 && (s[3] & 0xc0) == 0x80) {
#ifndef OFF_UNICODE_PUA
unsigned y;
y = ((unsigned)s[0] << 24) + ((unsigned)s[1] << 16) + ((unsigned)s[2] << 8) + (unsigned)s[3];
/* 11110011 10110000 10000000 10000000, U+F0000 - supplemental PUA-A begin
11110011 10111111 10111111 10111101, U+FFFFD - supplemental PUA-A end */
if (y >= 0xf3b08080 && y <= 0Xf3bfbfbd) goto esc_definitely;
/* 11110100 10000000 10000000 10000000, U+100000 - supplemental PUA-B begin
11110100 10001111 10111111 10111101, U+10FFFD - supplemental PUA-B end */
if (y >= 0xf4808080 && y <= 0Xf48fbfbd) goto esc_definitely;
#endif
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx, U+010000 - U+10FFFF
if (x >= 0x3c10 && x <= 0x3d0f) { n = 4; goto next_up; };
}
}
}
// invalid or incomplete sequence
esc_definitely:
n = -1;
// fall through
esc_maybe:
/* Escape the character to a '?' if
* Not valid UTF (when n is -1)
* Non-control chars below SPACE
* DEL
* 32 unicode multibyte control characters which begin at U+0080 (0xc280)
*/
if ((n < 0)
|| ((s[0] < 0x20)
|| ((s[0] == 0x7f)
|| ((s[0] == 0xc2 && s[1] >= 0x80 && s[1] <= 0x9f))))) {
*s = '?';
n = 1;
}
// fall through
next_up:
s += n;
size -= n;
}
}
/*
* Given a bad locale/corrupt str, replace all non-printing stuff
*/
static inline void esc_all (unsigned char *str) {
while (*str) {
if (!isprint(*str))
*str = '?';
++str;
}
}
int escape_str (char *dst, const char *src, int bufsize) {
static __thread int utf_sw = 0;
int n;
if (utf_sw == 0) {
char *enc = nl_langinfo(CODESET);
utf_sw = enc && strcasecmp(enc, "UTF-8") == 0 ? 1 : -1;
}
SECURE_ESCAPE_ARGS(dst, bufsize);
n = snprintf(dst, bufsize, "%s", src);
if (n < 0) {
*dst = '\0';
return 0;
}
if (n >= bufsize) n = bufsize-1;
if (utf_sw < 0)
esc_all((unsigned char *)dst);
else
u8charlen((unsigned char *)dst, n);
return n;
}
int escape_command (char *outbuf, const proc_t *pp, int bytes, unsigned flags) {
int overhead = 0;
int end = 0;
if (flags & ESC_BRACKETS)
overhead += 2;
if (flags & ESC_DEFUNCT) {
if (pp->state == 'Z') overhead += 10; // chars in " <defunct>"
else flags &= ~ESC_DEFUNCT;
}
if (overhead + 1 >= bytes) {
// if no room for even one byte of the command name
outbuf[0] = '\0';
return 0;
}
if (flags & ESC_BRACKETS)
outbuf[end++] = '[';
end += escape_str(outbuf+end, pp->cmd, bytes-overhead);
// we want "[foo] <defunct>", not "[foo <defunct>]"
if (flags & ESC_BRACKETS)
outbuf[end++] = ']';
if (flags & ESC_DEFUNCT) {
memcpy(outbuf+end, " <defunct>", 10);
end += 10;
}
outbuf[end] = '\0';
return end; // bytes, not including the NUL
}
|