File: charset.c

package info (click to toggle)
newmail 0.5-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 200 kB
  • sloc: ansic: 781; makefile: 17
file content (107 lines) | stat: -rw-r--r-- 2,599 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/*
    Copyright (c) 2006,8  Joey Schulze <joey@infodrom.org>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <locale.h>
#include <langinfo.h>
#include <string.h>
#include <iconv.h>
#include <errno.h>

char *charset = NULL;

/*
 * Determine the output character set
 */
void set_charset()
{
  setlocale (LC_CTYPE, "");

  charset = strdup (nl_langinfo(CODESET));
}

/*
 * Convert a word from an arbitrary charset into the output character set
 *
 * No conversion is performed when both charsets are equal
 */
char *convert_word(const char *encoding, char *inbuf, char *outbuf, size_t outbytesleft)
{
  iconv_t cd;
  char *inptr, *outptr;
  size_t inbytesleft;
  size_t nconv;
  size_t outsize;

  if (!charset || !strcasecmp (encoding, charset)) {
    memmove (outbuf, inbuf, strlen(inbuf)<outbytesleft?strlen(inbuf)+1:outbytesleft);
    outbuf[outbytesleft-1] = '\0';
    return outbuf;
  }

  outsize = outbytesleft;

  cd = iconv_open (charset, encoding);

  inbytesleft = strlen (inbuf)+1;
  inptr = inbuf;
  outptr = outbuf;

  while (1) {
    nconv = iconv (cd, &inptr, &inbytesleft, &outptr, &outbytesleft);

    if (nconv != -1)
      break;

    if (errno == EILSEQ && outsize-outbytesleft >= 0 && outbytesleft > 1) {
      outbuf[outsize-outbytesleft] = '?';
      outbuf[outsize-outbytesleft+1] = '\0';
      outbytesleft--;
      inbytesleft--;
      outptr++;
      inptr++;
    } else
      break;
  }

  iconv_close(cd);

  if (nconv == -1 && outsize-outbytesleft >= 0)
    outbuf[outsize-outbytesleft] = '\0';

  return outbuf;
}


/*
 * Needs to be called with LANG=de_DE.ISO-8859-1

void test_charset()
{
  char outbuf[100];
  size_t size = 99;

  memset (outbuf, 0, sizeof (outbuf));
  printf ("%s\n", convert_word ("UTF-8", "für ein", outbuf, size));
  printf ("%s\n", outbuf);
  if (!strcmp(outbuf, "fr ein"))
    printf ("charset.c: test passed\n");
  else
    printf ("charset.c: test failed\n");
}

*/