File: test-glibc.c

package info (click to toggle)
libidn2 2.3.8-4
links: PTS, VCS
area: main
in suites: forky, sid
size: 8,768 kB
sloc: ansic: 6,049; sh: 1,480; makefile: 499; xml: 50; perl: 15
file content (327 lines) | stat: -rw-r--r-- 9,471 bytes
parent folder | download | duplicates (2)
/* test-glibc.c -- API trace test extracted from the glibc AI_IDN tests.
   Copyright (C) 2019 Red Hat, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

/* Before changing test expecations in this file, please contact the
   glibc developers on the libc-alpha mailing list to check if these
   changes are benign and will not lead to glibc test suite failures.
   Thanks.  */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <locale.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <wchar.h>

#include <uniconv.h>

#include <idn2.h>

/* This assumes that wchar_t uses a UTF-16 or UTF-32 encoding.  */
static const wchar_t L_naemchen[] =
  { L'n', 0344, L'm', L'c', L'h', L'e', L'n', 0 };
static const char *naemchen_latin1 = "n\344mchen";
static const char *naemchen_utf8 = "n\xC3\xA4mchen";

static const wchar_t L_shem[] = { 0x05E9, 0x05DD, 0 };

static const char *shem_utf8 = "\xD7\xA9\xD7\x9D";

/* Detected charset.  Note that charset_latin1 covers both ISO-8859-1
   and ISO-8859-15.  */
enum charset_kind
{
  charset_utf8, charset_latin1, charset_neither
};

/* wcsrtombs with a static buffer.  */
static char * __attribute__((malloc)) wcsrtombs_strdup (const wchar_t *input)
{
  char buf[100];
  const wchar_t *src = input;
  mbstate_t state;

  memset (&state, 0, sizeof (state));

  size_t ret = wcsrtombs (buf, &src, sizeof (buf), &state);
  if (ret == (size_t) -1)
    buf[0] = '\0';

  char *result = strdup (buf);
  if (result == NULL)
    {
      puts ("error: memory allocation failure");
      exit (EXIT_FAILURE);
    }

  return result;
}

static const char *locale;

static enum charset_kind
determine_current_charset_kind (void)
{
  const char *lc_string = locale_charset ();
  enum charset_kind expected;

  if (strcmp (lc_string, "UTF-8") == 0)
    expected = charset_utf8;
  else if (strcmp (lc_string, "ISO-8859-1") == 0
	   || strcmp (lc_string, "ISO-8859-15") == 0
	   || strcmp (lc_string, "CP1252") == 0)
    expected = charset_latin1;
  else
    expected = charset_neither;

  char *naemchen_bytes = wcsrtombs_strdup (L_naemchen);
  char *shem_bytes = wcsrtombs_strdup (L_shem);
  enum charset_kind actual;

  if (strcmp (naemchen_bytes, naemchen_utf8) == 0
      && strcmp (shem_bytes, shem_utf8) == 0)
    actual = charset_utf8;
  else if (strcmp (naemchen_bytes, naemchen_latin1) == 0
	   && strcmp (shem_bytes, "") == 0)
    actual = charset_latin1;
  else
    actual = charset_neither;

  free (shem_bytes);
  free (naemchen_bytes);

  if (expected != actual)
    {
      printf ("error: locale %s: expected charset %u (%s), got %u\n",
	      locale, expected, lc_string, actual);
      exit (EXIT_FAILURE);
    }

  return actual;
}

static int errors;

static void
check_success (const char *func, const char *input, const char *expected,
	       int ret, char *actual)
{
  if (ret != 0)
    {
      printf ("error: locale %s: %s: input \"%s\": %d\n",
	      locale, func, input, ret);
      ++errors;
    }
  else
    {
      if (strcmp (actual, expected) != 0)
	{
	  printf ("error: locale %s: %s: input \"%s\": \"%s\"\n",
		  locale, func, input, actual);
	  ++errors;
	}
      idn2_free (actual);
    }
}

static void
check_lookup_ul_success (const char *input, const char *expected)
{
  char *actual = NULL;
  int ret = idn2_lookup_ul (input, &actual, 0);
  check_success ("idn2_lookup_ul", input, expected, ret, actual);
}

static void
check_to_unicode_lzlz_success (const char *input, const char *expected)
{
  char *actual = NULL;
  int ret = idn2_to_unicode_lzlz (input, &actual, 0);
  check_success ("idn2_to_unicode_lzlz", input, expected, ret, actual);
}

static void
check_to_unicode_lzlz_failure (const char *input, int expected)
{
  char *unexpected = NULL;
  int actual = idn2_to_unicode_lzlz (input, &unexpected, 0);

  if (actual == 0)
    {
      printf ("error: idn2_to_unicode_lzlz: locale %s:"
	      "unexpected success for input \"%s\": \"%s\"\n",
	      locale, input, unexpected);
      ++errors;
      idn2_free (unexpected);
    }
  else if (actual != expected)
    {
      printf ("error: idn2_to_unicode_lzlz: locale %s:"
	      "expected failure %d for input \"%s\", actual %d\n",
	      locale, expected, input, actual);
      ++errors;
    }
}

static void
run_utf8_tests (void)
{
  check_lookup_ul_success ("\327\251\327\2351.example",
			   "xn--1-qic9a.example");
  check_lookup_ul_success ("\327\251\327\235.example", "xn--iebx.example");
  check_lookup_ul_success ("both.cname.idn-cname.n\303\244mchen.example",
			   "both.cname.idn-cname.xn--nmchen-bua.example");
  check_lookup_ul_success ("bu\303\237e.example", "xn--bue-6ka.example");
  check_lookup_ul_success ("n\303\244mchen.example",
			   "xn--nmchen-bua.example");
  check_lookup_ul_success ("n\303\244mchen_zwo.example",
			   "xn--nmchen_zwo-q5a.example");
  check_lookup_ul_success ("with.cname.n\303\244mchen.example",
			   "with.cname.xn--nmchen-bua.example");
  check_lookup_ul_success ("With.idn-cname.n\303\244mchen.example",
			   "with.idn-cname.xn--nmchen-bua.example");

  check_to_unicode_lzlz_success ("non-idn-cname.example",
				 "non-idn-cname.example");
  check_to_unicode_lzlz_success ("non-idn.example", "non-idn.example");
  check_to_unicode_lzlz_success ("non-idn-name.example",
				 "non-idn-name.example");
  check_to_unicode_lzlz_success ("xn--1-qic9a.example",
				 "\327\251\327\2351.example");
  check_to_unicode_lzlz_success ("xn--anderes-nmchen-eib.example",
				 "anderes-n\303\244mchen.example");
  check_to_unicode_lzlz_success ("xn--bue-6ka.example",
				 "bu\303\237e.example");
  check_to_unicode_lzlz_success ("xn--iebx.example",
				 "\327\251\327\235.example");
  check_to_unicode_lzlz_success ("xn--nmchen-bua.example",
				 "n\303\244mchen.example");
  check_to_unicode_lzlz_success ("xn--nmchen_zwo-q5a.example",
				 "n\303\244mchen_zwo.example");

  check_to_unicode_lzlz_failure ("xn---.example", IDN2_PUNYCODE_BAD_INPUT);
  check_to_unicode_lzlz_failure ("xn--x.example", IDN2_PUNYCODE_BAD_INPUT);
}

static void
run_latin1_tests (void)
{
  check_lookup_ul_success ("both.cname.idn-cname.n\344mchen.example",
			   "both.cname.idn-cname.xn--nmchen-bua.example");
  check_lookup_ul_success ("bu\337e.example", "xn--bue-6ka.example");
  check_lookup_ul_success ("n\344mchen.example", "xn--nmchen-bua.example");
  check_lookup_ul_success ("n\344mchen_zwo.example",
			   "xn--nmchen_zwo-q5a.example");
  check_lookup_ul_success ("with.cname.n\344mchen.example",
			   "with.cname.xn--nmchen-bua.example");
  check_lookup_ul_success ("With.idn-cname.n\344mchen.example",
			   "with.idn-cname.xn--nmchen-bua.example");

  check_to_unicode_lzlz_success ("non-idn-cname.example",
				 "non-idn-cname.example");
  check_to_unicode_lzlz_success ("non-idn.example", "non-idn.example");
  check_to_unicode_lzlz_success ("non-idn-name.example",
				 "non-idn-name.example");
  check_to_unicode_lzlz_success ("xn--anderes-nmchen-eib.example",
				 "anderes-n\344mchen.example");
  check_to_unicode_lzlz_success ("xn--bue-6ka.example", "bu\337e.example");
  check_to_unicode_lzlz_success ("xn--nmchen-bua.example",
				 "n\344mchen.example");
  check_to_unicode_lzlz_success ("xn--nmchen_zwo-q5a.example",
				 "n\344mchen_zwo.example");

  check_to_unicode_lzlz_failure ("xn--1-qic9a.example", IDN2_ENCODING_ERROR);
  check_to_unicode_lzlz_failure ("xn--iebx.example", IDN2_ENCODING_ERROR);
  check_to_unicode_lzlz_failure ("xn---.example", IDN2_PUNYCODE_BAD_INPUT);
  check_to_unicode_lzlz_failure ("xn--x.example", IDN2_PUNYCODE_BAD_INPUT);
}

static const char *const locale_candidates[] = {
  "C",
  "C.UTF-8",
  "en_US",
  "en_US.utf8",
  "en_US.iso88591",
  "de_DE",
  "de_DE.utf8",
  "de_DE.iso88591",
  "de_DE.iso885915@euro",
  "fr_FR",
  "fr_FR.utf8",
  "fr_FR.iso88591",
  "he_IL.utf8",
  NULL
};

int
main (void)
{
  bool utf8_seen = false;
  bool latin1_seen = false;

  for (size_t i = 0; locale_candidates[i] != NULL; ++i)
    {
      locale = locale_candidates[i];
      if (setlocale (LC_ALL, locale) == NULL)
	continue;

      switch (determine_current_charset_kind ())
	{
	case charset_utf8:
	  run_utf8_tests ();
	  utf8_seen = true;
	  break;
	case charset_latin1:
	  run_latin1_tests ();
	  latin1_seen = true;
	  break;
	case charset_neither:
	  continue;
	}
    }

  if (!utf8_seen)
    {
      /* Mingw64 does not have a UTF-8 locale.  */
#ifndef __MINGW64__
      puts ("error: no UTF-8 locale found");
      ++errors;
#else
      puts ("warning: no UTF-8 support on Mingw");
#endif
    }

  /* Not everyone has a Latin-1 locale installed.  */
  if (!latin1_seen)
    puts ("warning: no Latin-1 locale found");

  if (!(utf8_seen || latin1_seen))
    {
      puts ("error: no usable locales found");
      ++errors;
    }

  if (errors)
    return EXIT_FAILURE;
  else
    return EXIT_SUCCESS;
}