File: utf8_chset.c

package info (click to toggle)
cone 0.75-1
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 31,040 kB
  • ctags: 13,930
  • sloc: ansic: 90,648; cpp: 79,781; sh: 18,355; perl: 3,218; makefile: 1,611; yacc: 289; sed: 16
file content (88 lines) | stat: -rw-r--r-- 1,668 bytes parent folder | download | duplicates (12)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/*
** Copyright 2000 Double Precision, Inc.
** See COPYING for distribution information.
**
** $Id: utf8_chset.c,v 1.5 2004/05/23 14:28:25 mrsam Exp $
*/

#include "unicode.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
** UTF8.toupper/tolower/totitle is implemented by converting UTF8 to
** UCS-4, applying the unicode table lookup, then converting it back to
** UTF8
*/

static char *toupper_func(const struct unicode_info *u,
			  const char *cp, int *ip)
{
	unicode_char *uc=unicode_utf8_tou(cp, ip), *p;
	char *s;

	if (!uc) return (0);

	for (p=uc; *p; p++)
		*p=unicode_uc(*p);

	s=unicode_utf8_fromu(uc, NULL);
	free(uc);
	return (s);
}

static char *tolower_func(const struct unicode_info *u,
			  const char *cp, int *ip)
{
	unicode_char *uc=unicode_utf8_tou(cp, ip), *p;
	char *s;

	if (!uc) return (0);

	for (p=uc; *p; p++)
		*p=unicode_lc(*p);

	s=unicode_utf8_fromu(uc, NULL);
	free(uc);
	return (s);
}

static char *totitle_func(const struct unicode_info *u,
			  const char *cp, int *ip)
{
	unicode_char *uc=unicode_utf8_tou(cp, ip), *p;
	char *s;

	if (!uc) return (0);

	for (p=uc; *p; p++)
		*p=unicode_tc(*p);

	s=unicode_utf8_fromu(uc, NULL);
	free(uc);
	return (s);
}

static unicode_char *tou(const struct unicode_info *i, const char *p,
			 int *err)
{
	return unicode_utf8_tou(p, err);
}

static char *fromu(const struct unicode_info *i, const unicode_char *p,
		   int *err)
{
	return unicode_utf8_fromu(p, err);
}

const struct unicode_info unicode_UTF8 = {
	"UTF-8",
	UNICODE_UTF | UNICODE_MB | UNICODE_USASCII | UNICODE_HEADER_QUOPRI
	| UNICODE_BODY_QUOPRI,
	tou,
	fromu,
	toupper_func,
	tolower_func,
	totitle_func};