File: qmark.c

package info (click to toggle)
findutils 4.10.0-3
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 26,108 kB
sloc: ansic: 126,635; sh: 11,486; yacc: 1,852; makefile: 844; exp: 839; python: 66; sed: 16
file content (217 lines) | stat: -rw-r--r-- 5,918 bytes
/* qmark.c -- quote 'dangerous' filenames
   Derived from coreutils' ls.c.
   Copyright (C) 1985-2024 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
*/
/* config.h must be included first. */
#include <config.h>

/* system headers. */
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>

/* gnulib headers would go here if any needed to be included. */

/* find headers. */
#include "printquoted.h"



/*
   This comment, IN_CTYPE_DOMAIN and ISPRINT were borrowed from
   coreutils at Sun Jun  5 21:17:40 2005 UTC.

   Jim Meyering writes:

   "... Some ctype macros are valid only for character codes that
   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
   using /bin/cc or gcc but without giving an ansi option).  So, all
   ctype uses should be through macros like ISPRINT...  If
   STDC_HEADERS is defined, then autoconf has verified that the ctype
   macros don't need to be guarded with references to isascii. ...
   Defining isascii to 1 should let any compiler worth its salt
   eliminate the && through constant folding."

   Bruno Haible adds:

   "... Furthermore, isupper(c) etc. have an undefined result if c is
   outside the range -1 <= c <= 255. One is tempted to write isupper(c)
   with c being of type `char', but this is wrong if c is an 8-bit
   character >= 128 which gets sign-extended to a negative value.
   The macro ISUPPER protects against this as well."

   (Actually that rule of ISUPPER is now taken by to_uchar).
*/

#if STDC_HEADERS
# define IN_CTYPE_DOMAIN(c) 1
#else
# define IN_CTYPE_DOMAIN(c) isascii(c)
#endif

/* ISPRINT is defined in <sys/euc.h> on at least Solaris2.6 systems.  */
#undef ISPRINT
#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))





/* Convert a possibly-signed character to an unsigned character.  This is
 * a bit safer than casting to unsigned char, since it catches some type
 * errors that the cast doesn't.
 *
 * This code taken from coreutils' system.h header at
 * Sun Jun  5 21:05:21 2005 UTC.
 */
static inline unsigned char to_uchar (char ch)
{
  return ch;
}


static size_t
unibyte_qmark_chars (char *buf, size_t len)
{
  char *p = buf;
  char const *plimit = buf + len;

  while (p < plimit)
    {
      if (! ISPRINT (to_uchar (*p)))
	*p = '?';
      p++;
    }
  return len;
}





/* Scan BUF, replacing any dangerous-looking characters with question
 * marks.  This code is taken from the ls.c file in coreutils as at
 * Sun Jun  5 20:51:54 2005 UTC.
 *
 * This function may shrink the buffer.   Either way, the new length
 * is returned.
 */
size_t
qmark_chars (char *buf, size_t len)
{
  if (MB_CUR_MAX <= 1)
    {
      return unibyte_qmark_chars (buf, len);
    }
  else
    {
      char const *p = buf;
      char const *plimit = buf + len;
      char *q = buf;

      while (p < plimit)
	switch (*p)
	  {
	  case ' ': case '!': case '"': case '#': case '%':
	  case '&': case '\'': case '(': case ')': case '*':
	  case '+': case ',': case '-': case '.': case '/':
	  case '0': case '1': case '2': case '3': case '4':
	  case '5': case '6': case '7': case '8': case '9':
	  case ':': case ';': case '<': case '=': case '>':
	  case '?':
	  case 'A': case 'B': case 'C': case 'D': case 'E':
	  case 'F': case 'G': case 'H': case 'I': case 'J':
	  case 'K': case 'L': case 'M': case 'N': case 'O':
	  case 'P': case 'Q': case 'R': case 'S': case 'T':
	  case 'U': case 'V': case 'W': case 'X': case 'Y':
	  case 'Z':
	  case '[': case '\\': case ']': case '^': case '_':
	  case 'a': case 'b': case 'c': case 'd': case 'e':
	  case 'f': case 'g': case 'h': case 'i': case 'j':
	  case 'k': case 'l': case 'm': case 'n': case 'o':
	  case 'p': case 'q': case 'r': case 's': case 't':
	  case 'u': case 'v': case 'w': case 'x': case 'y':
	  case 'z': case '{': case '|': case '}': case '~':
	    /* These characters are printable ASCII characters.  */
	    *q++ = *p++;
	    break;
	  default:
	    /* If we have a multibyte sequence, copy it until we
	       reach its end, replacing each non-printable multibyte
	       character with a single question mark.  */
	    {
	      mbstate_t mbstate;
	      memset (&mbstate, 0, sizeof mbstate);
	      do
		{
		  wchar_t wc;
		  size_t bytes;
		  int w;

		  bytes = mbrtowc (&wc, p, plimit - p, &mbstate);

		  if (bytes == (size_t) -1)
		    {
		      /* An invalid multibyte sequence was
			 encountered.  Skip one input byte, and
			 put a question mark.  */
		      p++;
		      *q++ = '?';
		      break;
		    }

		  if (bytes == (size_t) -2)
		    {
		      /* An incomplete multibyte character
			 at the end.  Replace it entirely with
			 a question mark.  */
		      p = plimit;
		      *q++ = '?';
		      break;
		    }

		  if (bytes == 0)
		    /* A null wide character was encountered.  */
		    bytes = 1;

		  w = wcwidth (wc);
		  if (w >= 0)
		    {
		      /* A printable multibyte character.
			 Keep it.  */
		      for (; bytes > 0; --bytes)
			*q++ = *p++;
		    }
		  else
		    {
		      /* An unprintable multibyte character.
			 Replace it entirely with a question
			 mark.  */
		      p += bytes;
		      *q++ = '?';
		    }
		}
	      while (! mbsinit (&mbstate));
	    }
	    break;
	  }

      /* The buffer may have shrunk.  */
      len = q - buf;
      return len;
    }
}