File: string_util_icu.cc

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (67 lines) | stat: -rw-r--r-- 1,873 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "device/bluetooth/string_util_icu.h"

#include <memory>

#include "base/memory/singleton.h"
#include "base/strings/string_util.h"
#include "third_party/icu/source/common/unicode/uniset.h"

namespace device {

namespace {

class GraphicCharacters {
 public:
  static GraphicCharacters* GetInstance() {
    return base::Singleton<GraphicCharacters, base::LeakySingletonTraits<
                                                  GraphicCharacters>>::get();
  }

  GraphicCharacters(const GraphicCharacters&) = delete;
  GraphicCharacters& operator=(const GraphicCharacters&) = delete;

  bool HasGraphicCharacter(std::string_view s) {
    int32_t length = graphic_->spanUTF8(
        s.data(), s.size(), USetSpanCondition::USET_SPAN_NOT_CONTAINED);
    return static_cast<size_t>(length) != s.size();
  }

 private:
  friend struct base::DefaultSingletonTraits<GraphicCharacters>;

  GraphicCharacters();

  // set of graphic characters.
  std::unique_ptr<icu::UnicodeSet> graphic_;
};

GraphicCharacters::GraphicCharacters() {
  UErrorCode graphic_status = U_ZERO_ERROR;

  // The set of Unicode Graphic Characters as defined by
  // http://www.unicode.org/reports/tr18/#graph
  // This set is composed of the characters not included in the following
  // sets:
  // - Whitespace (WSpace)
  // - gc=Control (Cc)
  // - gc=Surrogate (Cs)
  // - gc=Unassigned (Cn)
  graphic_ = std::make_unique<icu::UnicodeSet>(
      UNICODE_STRING_SIMPLE("[:graph:]"), graphic_status);
  DCHECK(U_SUCCESS(graphic_status));

  graphic_->freeze();
}

}  // namespace

bool HasGraphicCharacter(std::string_view s) {
  DCHECK(base::IsStringUTF8(s));
  return GraphicCharacters::GetInstance()->HasGraphicCharacter(s);
}

}  // namespace device