File: virt_mbrlen.c

package info (click to toggle)
virtuoso-opensource 6.1.6%2Bdfsg2-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 260,060 kB
  • ctags: 123,765
  • sloc: ansic: 652,532; sql: 458,419; xml: 282,834; java: 61,031; sh: 40,031; cpp: 36,890; cs: 25,240; php: 12,692; yacc: 9,523; lex: 7,018; makefile: 6,157; jsp: 4,484; awk: 1,643; perl: 1,013; ruby: 1,003; python: 326
file content (109 lines) | stat: -rw-r--r-- 3,071 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*
 *  virt_mbrlen.c
 *
 *  Platform-intependent clone of mbrlen - determine number of bytes in next multibyte character
 *
 *  $Id$
 *
 *  This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
 *  project.
 *
 *  Copyright (C) 1998-2012 OpenLink Software
 *
 *  This project is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the
 *  Free Software Foundation; only version 2 of the License, dated June 1991.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 *
 */

#include "libutil.h"

size_t
virt_mbrlen (const char *s, size_t n, virt_mbstate_t *ps)
{
  size_t used = 0;
  static virt_mbstate_t internal;

  if (ps == NULL)
    ps = &internal;

  if (s == NULL)
    {
      s = (unsigned char *) "";
      n = 1;
    }

  if (n > 0)
    {
      if (ps->count == 0)
        {
          unsigned char byte = (unsigned char) *s++;
          ++used;

          /* We must look for a possible first byte of a UTF8 sequence.  */
          if (!(byte & 0x80))
            return byte ? used : 0;
          if ((byte & 0xc0) == 0x80 || (byte & 0xfe) == 0xfe)
            return (size_t) -1;
          if ((byte & 0xe0) == 0xc0)
            {
              /* We expect two bytes.  */
              ps->count = 1;
              ps->value = byte & 0x1f;
            }
          else if ((byte & 0xf0) == 0xe0)
            {
              /* We expect three bytes.  */
              ps->count = 2;
              ps->value = byte & 0x0f;
            }
          else if ((byte & 0xf8) == 0xf0)
            {
              /* We expect four bytes.  */
              ps->count = 3;
              ps->value = byte & 0x07;
            }
          else if ((byte & 0xfc) == 0xf8)
            {
              /* We expect five bytes.  */
              ps->count = 4;
              ps->value = byte & 0x03;
            }
          else
            {
              /* We expect six bytes.  */
              ps->count = 5;
              ps->value = byte & 0x01;
            }
        }
      /* We know we have to handle a multibyte character and there are
         some more bytes to read.  */
      while (used < n)
        {
          /* The second to sixths byte must be of the form 10xxxxxx.  */
          unsigned char byte = (unsigned char) *s++;
          ++used;

          if ((byte & 0xc0) != 0x80)
            {
              return (size_t) -1;
            }
          ps->value <<= 6;
          ps->value |= byte & 0x3f;
          if (--ps->count == 0)
            {
              return ps->value ? used : 0;
            }
        }
    }
  return (size_t) -2;
}