File: mbs_endswith.c

package info (click to toggle)
coreutils 9.10-1
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 70,560 kB
  • sloc: ansic: 253,546; sh: 30,931; perl: 8,141; yacc: 1,846; makefile: 198; python: 47; sed: 16
file content (74 lines) | stat: -rw-r--r-- 2,705 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
/* mbs_endswith function.
   Copyright (C) 2025-2026 Free Software Foundation, Inc.

   This file is free software: you can redistribute it and/or modify
   it under the terms of the GNU Lesser General Public License as
   published by the Free Software Foundation, either version 3 of the
   License, or (at your option) any later version.

   This file is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

/* Written by Bruno Haible <bruno@clisp.org>, 2025.  */

#include <config.h>

/* Specification.  */
#include <string.h>

#include "mbiter.h"

bool
mbs_endswith (const char *string, const char *suffix)
{
  if ((unsigned char) suffix[0] < 0x30)
    /* Some knowledge about the possible multibyte encodings:

       Encoding    First byte of character    Which of these can occur
                                              as second or later byte?

        EUC-JP     0x00..0x7F, 0x8E..0xFE     0xA1..0xFE
        EUC-KR     0x00..0x7F, 0xA1..0xFD     0xA1..0xFD
        GB2312     0x00..0x7F, 0xA1..0xF7     0xA1..0xF7
        EUC-TW     0x00..0x7F, 0x8E..0xFD     0xA1..0xFD
        BIG5       0x00..0x7F, 0xA1..0xF9     0x40..0x7E, 0xA1..0xF9
        GB18030    0x00..0x7F, 0x81..0xFE     0x30..0x39, 0x40..0x7E, 0x81..0xFE
        SJIS       0x00..0x7F, 0x81..0xF9     0x40..0x7E, 0x81..0xF9
        UTF-8      0x00..0x7F, 0xC2..0xF7     none

       Therefore, if the first byte of SUFFIX is < 0x30, it cannot occur as
       second or later byte, and therefore it is OK to do a bytewise search.  */
    return str_endswith (string, suffix);

  /* Here, suffix is not empty.  */

  size_t nbytes = strlen (string);
  if (nbytes >= strlen (suffix))
    {
      size_t len = mbslen (string);
      size_t n = mbslen (suffix);
      if (len >= n)
        {
          mbi_iterator_t iter;
          mbi_init (iter, string, nbytes);
          /* Advance past (len - n) multibyte characters.  */
          for (; len > n; len--)
            {
              if (!mbi_avail (iter))
                /* We can get here due to incomplete multibyte characters.  */
                return false;
              mbi_advance (iter);
            }
          if (!mbi_avail (iter))
            /* We can get here due to incomplete multibyte characters.  */
            return false;
          return streq (mbi_cur_ptr (iter), suffix);
        }
    }
  return false;
}