File: string_splitter.h

package info (click to toggle)
chromium 138.0.7204.157-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,071,864 kB
  • sloc: cpp: 34,936,859; ansic: 7,176,967; javascript: 4,110,704; python: 1,419,953; asm: 946,768; xml: 739,967; pascal: 187,324; sh: 89,623; perl: 88,663; objc: 79,944; sql: 50,304; cs: 41,786; fortran: 24,137; makefile: 21,806; php: 13,980; tcl: 13,166; yacc: 8,925; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (118 lines) | stat: -rw-r--r-- 3,995 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#ifndef COMPONENTS_URL_PATTERN_INDEX_STRING_SPLITTER_H_
#define COMPONENTS_URL_PATTERN_INDEX_STRING_SPLITTER_H_

#include <iterator>
#include <string_view>

#include "base/check_op.h"
#include "base/memory/raw_ptr.h"

namespace url_pattern_index {

// A zero-allocation string splitter. Splits a string into non-empty tokens
// divided by separator characters as defined by the IsSeparator predicate.
// However, instead of materializing and returning a collection of all tokens in
// the string, it provides an InputIterator that can be used to extract the
// tokens.
//
// TODO(pkalinnikov): Move it to "base/strings" after some generalization.
template <typename IsSeparator>
class StringSplitter {
 public:
  class Iterator {
   public:
    using iterator_category = std::input_iterator_tag;
    using value_type = std::string_view;
    using difference_type = std::ptrdiff_t;
    using pointer = std::string_view*;
    using reference = std::string_view&;

    bool operator==(const Iterator& rhs) const {
      DCHECK_EQ(splitter_, rhs.splitter_);
      // If `current_` starts at the same position, all the other locations will
      // match.
      return current_.data() == rhs.current_.data();
    }

    std::string_view operator*() const { return current_; }
    const std::string_view* operator->() const { return &current_; }

    Iterator& operator++() {
      Advance();
      return *this;
    }

    Iterator operator++(int) {
      Iterator copy(*this);
      operator++();
      return copy;
    }

   private:
    friend class StringSplitter<IsSeparator>;

    // Creates an iterator, which points to the leftmost token within
    // `remaining`, which must be a suffix of `splitter`'s `text`.
    Iterator(const StringSplitter& splitter, std::string_view remaining)
        : splitter_(&splitter), remaining_(remaining) {
      DCHECK_LE(splitter_->text_.data(), remaining_.data());
      DCHECK_EQ(splitter_->text_.data() + splitter_->text_.size(),
                remaining_.data() + remaining_.size());
      Advance();
    }

    void Advance() {
      std::string_view::const_iterator begin = remaining_.begin();
      while (begin != remaining_.end() && splitter_->is_separator_(*begin)) {
        ++begin;
      }
      std::string_view::const_iterator end = begin;
      while (end != remaining_.end() && !splitter_->is_separator_(*end)) {
        ++end;
      }
      current_ = std::string_view(begin, end);
      remaining_ = std::string_view(end, remaining_.end());
    }

    raw_ptr<const StringSplitter<IsSeparator>> splitter_;

    // Contains the token currently pointed to by the iterator.
    std::string_view current_;
    // Contains the remaining text, starting from the current token and ending
    // at `text_.end()`.
    std::string_view remaining_;
  };

  // Constructs a splitter for iterating over non-empty tokens contained in the
  // `text`. `is_separator` predicate is used to determine whether a certain
  // character is a separator.
  explicit StringSplitter(std::string_view text,
                          IsSeparator is_separator = IsSeparator())
      : text_(text), is_separator_(is_separator) {}

  Iterator begin() const { return Iterator(*this, text_); }
  Iterator end() const { return Iterator(*this, text_.substr(text_.size())); }

 private:
  std::string_view text_;
  IsSeparator is_separator_;
};

template <typename IsSeparator>
StringSplitter<IsSeparator> CreateStringSplitter(std::string_view text,
                                                 IsSeparator is_separator) {
  return StringSplitter<IsSeparator>(text, is_separator);
}

}  // namespace url_pattern_index

#endif  // COMPONENTS_URL_PATTERN_INDEX_STRING_SPLITTER_H_